import pandas as pd
import geopandas as gpd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import gmaps
import gmaps.datasets
import requests
import re
# jupyter nbconvert --to html --TemplateExporter.exclude_input=True Eesti_haldusjaotus.ipynb --output='EST_regions.html'
NUTS 3 units based on 2018 counties (15):
- EE001 --- North (Harju County)
- EE004 --- West (Hiiu County, Lääne County, Pärnu County, Saare County)
- EE008 --- South (Jõgeva County, Põlva County, Tartu County, Valga County, Viljandi County, Võru County)
- EE009 --- Central (Järva County, Lääne-Viru County, Rapla County)
- EE00A --- N_East (Ida-Viru County)
Only one unit at NUTS 1 and 2 level; five level 3 units
# 2018 county recode to NUTS 3
keys = ['Harju maakond',
'Hiiu maakond','Lääne maakond','Pärnu maakond','Saare maakond',
'Järva maakond','Lääne-Viru maakond','Rapla maakond',
'Ida-Viru maakond',
'Jõgeva maakond','Põlva maakond','Tartu maakond','Valga maakond','Viljandi maakond','Võru maakond']
values = ['North',
'West','West','West','West',
'Central','Central','Central',
'N_East',
'South','South','South','South','South','South']
nuts_dict = {key: [] for key in keys}
for k, v in zip(keys, values):
nuts_dict[k].append(v)
This will be the base plot to compare historical borders to.
# current county map
mknd2021 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/2021/maakond_2021-01-01.shp')
mknd2021['NUTS'] = mknd2021['MNIMI'].replace(nuts_dict)
mknd2021['MNIMI2'] = mknd2021['MNIMI'].str.split().str.get(0)
fig, ax = plt.subplots(figsize=(20,10))
mknd2021.plot(column='NUTS', edgecolor='black', ax=ax, cmap='Pastel2')
ax.set_title("2021 county borders (black), colour-filled by NUTS 3 regions")
mknd2021.apply(lambda x: ax.annotate(text=x.MNIMI2, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
ax.set_axis_off()
Historical administrative border maps made available by Estonian land board (these are .SHP shape-files!).
Below, I will colour-fill the country map of Estonia by NUTS 3 regions and overlay historical county/rayon and smaller admin.unit borders to it.
Starting with 1997 county borders over 2021 NUTS 3 areas and moving backwards in time.
mknd1997 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1997/maakond_1997-12-02.shp')
# ov1997 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1997/omavalitsus_1997-12-02.shp')
fig, ax = plt.subplots(figsize=(20, 10))
q = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
q = mknd1997.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
# q = ov1997.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
q.set_title("1997 counties (black), local gov-s (red) over NUTS 3 (colour-fill)")
mknd2021.apply(lambda x: ax.annotate(text=x.MNIMI2, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
q.set_axis_off()
rjn1989 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1989/Rajoon_1989.shp')
# kn1989 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1989/Kylanoukogu_1989.shp')
fig, ax = plt.subplots(figsize=(20, 10))
p = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
p = rjn1989.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
#p = kn1989.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
p.set_title("1989 rayons (black), selsoviets (red) over NUTS 3 (colour-fill)")
rjn1989.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
p.set_axis_off()
rjn1979 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1979/Rajoon_1979.shp')
# kn1979 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1979/Kylanoukogu_1979.shp')
fig, ax = plt.subplots(figsize=(20, 10))
q = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
q = rjn1979.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
# q = kn1979.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
q.set_title("1979 rayons (black), selsoviets (red) over NUTS 3 (colour-fill)")
rjn1979.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
q.set_axis_off()
rjn1959 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1959/Rajoon_1959.shp')
# kn1959 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1959/Kylanoukogu_1959.shp')
fig, ax = plt.subplots(figsize=(20, 10))
p = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
p = rjn1959.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
# p = kn1959.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
p.set_title("1959 rayons (black), selsoviets (red) over NUTS 3 (colour-fill)")
rjn1959.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=12), axis=1)
p.set_axis_off()
# 1959 rayon recode to NUTS3
keys3 = ['Harju','Keila','Tallinn',
'Hiiumaa','Haapsalu','Lihula','Pärnu','Kingissepa','Vändra',
'Märjamaa','Rapla','Paide','Tapa','Väike-Maarja','Rakvere',
'Narva','Kohtla-Järve','Jõhvi','Sillamäe',
'Jõgeva','Põltsamaa','Põlva','Tartu','Elva','Räpina','Valga','Viljandi','Võru','Abja']
values3 = ['North','North','North',
'West','West','West','West','West','West',
'Central','Central','Central','Central','Central','Central',
'N_East','N_East','N_East','N_East',
'South','South','South','South','South','South','South','South','South','South']
rayon_dict = {key: [] for key in keys3}
for k, v in zip(keys3, values3):
rayon_dict[k].append(v)
mknd1938 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1938/Maakond_1938.shp')
# vald1938 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1938/Vald_1938.shp')
fig, ax = plt.subplots(figsize=(20, 10))
q = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
# q = vald1938.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
q = mknd1938.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
q.set_title("1938 counties (black), parishes (red) over NUTS 3 (colour-fill)")
mknd1938.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
q.set_axis_off()
mknd1922 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1922/Maakond_1922.shp')
# vald1922 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1922/Vald_1922.shp')
fig, ax = plt.subplots(figsize=(20, 10))
p = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
# p = vald1922.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
p = mknd1922.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
p.set_title("1922 counties (black), parishes (red) over NUTS 3 (colour-fill)")
mknd1922.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
p.set_axis_off()
mknd1917 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1917/Maakond_1917.shp')
# khlk1917 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1917/Kihelkond_kuni_1917.shp')
# mois1917 = gpd.read_file('/Users/martin/Documents/Loendused/Kaardid/1917/moisamaad_1917.shp')
fig, ax = plt.subplots(figsize=(26, 12))
p = mknd2021.plot(column='NUTS', cmap='Pastel2', ax = ax)
# p = khlk1917.plot(color='none', edgecolor='red', linewidths=0.5, ax=ax)
p = mknd1917.plot(color='none', edgecolor='black', linewidths=2, ax=ax)
# p = mois1917.plot(color='none', edgecolor='blue', linewidths=0.5, ax=ax)
p.set_title("1917 counties (black), kihelkond (red) over NUTS 3 (colour-fill)")
mknd1917.apply(lambda x: ax.annotate(text=x.NIMI, xy=x.geometry.centroid.coords[0], ha='center', size=16), axis=1)
p.set_axis_off()
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
cens_files = glob.glob('/Users/martin/Documents/Loendused/Stat_ee/*.csv')
cens_files
['/Users/martin/Documents/Loendused/Stat_ee/loendus_1934.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1897.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1881.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1922.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1979.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1989.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1959.csv', '/Users/martin/Documents/Loendused/Stat_ee/loendus_1970.csv']
# 1959 census
df_1959 = pd.concat([pd.read_csv(f) for f in cens_files[6:7]],
ignore_index=True)
df_1959 = df_1959[pd.to_numeric(df_1959['Unnamed: 0'], errors='coerce').notnull()] # ainult nummerdatud read
df_1959 = df_1959[~df_1959['Rajoon, linn'].isin(['Kogurahvastik','Linnarahvastik','Maarahvastik'])] # rajoonid + linnad
print(df_1959['Kokku'].sum())
1196791.0
df_1959['Rajoon, linn'] = df_1959['Rajoon, linn'].str.title()
df_1959['NUTS3'] = df_1959['Rajoon, linn'].replace(rayon_dict)
df_1959 = df_1959.groupby(['Aasta', 'NUTS3'])['Kokku'].agg('sum').reset_index()
df_1959
| Aasta | NUTS3 | Kokku | |
|---|---|---|---|
| 0 | 1959 | Central | 154360.0 |
| 1 | 1959 | N_East | 133093.0 |
| 2 | 1959 | North | 353919.0 |
| 3 | 1959 | South | 392337.0 |
| 4 | 1959 | West | 163082.0 |
import json
import requests
import urllib.request
import pandas as pd
import pyjstat
from pyjstat import pyjstat
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px
init_notebook_mode(connected=False)
# Population by county 1990-2017
url = "https://andmed.stat.ee/api/v1/en/stat/RV022"
payload = {
"query": [
{
"code": "Maakond",
"selection": {
"filter": "item",
"values": [
"2", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "16", "17", "18", "19"
]
}
},
{
"code": "Sugu",
"selection": {
"filter": "item",
"values": [
"K"
]
}
},
{
"code": "Vanuserühm",
"selection": {
"filter": "item",
"values": [
"999"
]
}
}
],
"response": {
"format": "json-stat2"
}
}
data = requests.post(url, data=json.dumps(payload))
r = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
# print(r)
df = pd.DataFrame(r[0], columns=['Year','County','Sex','Age group','value'])
df['County'] = df.County.str.split(" ").str[0]
df.loc[df.County == 'County', 'County'] = 'Unknown'
dat_edk = pd.read_csv("/Users/martin/Documents/Loendused/EKDK/maakonnad_1965-1990.csv")
dat_edk = dat_edk[:-1]
df2 = pd.concat([dat_edk.melt(id_vars='Year', var_name="County"),df])
# County name recode to NUTS3
keys2 = ['Harju',
'Hiiu','Lääne','Pärnu','Saare',
'Järva','Lääne-Viru','Rapla',
'Ida-Viru',
'Jõgeva','Põlva','Tartu','Valga','Viljandi','Võru']
values2 = ['North',
'West','West','West','West',
'Central','Central','Central',
'N_East',
'South','South','South','South','South','South']
nuts_dict2 = {key: [] for key in keys2}
for k, v in zip(keys2, values2):
nuts_dict2[k].append(v)
df3 = df2[['Year','County','value']].copy()
df3['NUTS3'] = df3['County'].replace(nuts_dict2)
df3 = df3[['Year','value','NUTS3']]
df3 = df3.groupby(['Year', 'NUTS3'])['value'].agg('sum').reset_index()
df_1959 = df_1959.rename({'Aasta': 'Year', 'Kokku': 'value'}, axis=1)
df4 = pd.concat([df3, df_1959])
df4 = df4.rename({'value':'Population'}, axis=1)
px.defaults.width = 1000
px.defaults.height = 500
fig = px.bar(df4, x="Year", y="Population",
color="NUTS3", hover_name="NUTS3", log_y=False)
fig.update_layout(title="Population by NUTS3 units 1959-2017")
fig.show()
url = "https://andmed.stat.ee/api/v1/en/stat/RV11"
payload = {
"query": [
{
"code": "Maakond",
"selection": {
"filter": "item",
"values": ["37","39","44","49","51","57","59","65","67","70","74","78","82","84","86","99"]
}
}
],
"response": {
"format": "json-stat2"
}
}
data = requests.post(url, data=json.dumps(payload))
r = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
births = pd.DataFrame(r[0], columns=['Year','County','value'])
births['County'] = births.County.str.split(" ").str[0]
births.loc[births.County == 'County', 'County'] = 'Unknown'
births['Year'] = pd.to_numeric(births['Year'])
births['NUTS3'] = births['County'].replace(nuts_dict2)
births = births.groupby(['Year', 'NUTS3'])['value'].agg('sum').reset_index()
births = births.rename({'value':'Births'}, axis=1)
fig = px.bar(births, x="Year", y="Births",
color="NUTS3", hover_name="NUTS3", log_y=False)
fig.update_layout(title="Number of births")
fig.show()
url = "https://andmed.stat.ee/api/v1/en/stat/RV52"
payload = {
"query": [
{
"code": "Sugu",
"selection": {
"filter": "item",
"values": ["1"]
}
},
{
"code": "Asustusüksuse liik",
"selection": {
"filter": "item",
"values": ["1"]
}
},
{
"code": "Elukoht",
"selection": {
"filter": "item",
"values": ["H2","37","39","44","49","51","57","59","65","67","70","74","78","82","84","86"]
}
},
{
"code": "Vanuserühm",
"selection": {
"filter": "item",
"values": ["2"]
}
}
],
"response": {"format": "json-stat2"}
}
data = requests.post(url, data=json.dumps(payload))
r = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
imr = pd.DataFrame(r[0], columns=['Year','Place of residence','value'])
imr = imr.rename({'Place of residence': 'County'}, axis=1)
imr['County'] = imr.County.str.split(" ").str[0]
imr.loc[imr.County == 'Place', 'County'] = 'Unknown'
dat_edk_imr = pd.read_csv("/Users/martin/Documents/Loendused/EKDK/infant_deaths_1965-1990.csv")
dat_edk_imr = dat_edk_imr[:-2]
imr3 = pd.concat([dat_edk_imr.drop('Total',axis=1).melt(id_vars='Year', var_name="County"), imr])
# imr3 = imr2[['Year','County','value']].copy()
imr3['NUTS3'] = imr3['County'].replace(nuts_dict2)
#imr3 = imr3[['Year','value','NUTS3']]
imr3 = imr3.groupby(['Year', 'NUTS3'])['value'].agg('sum').reset_index()
imr3 = imr3.rename({'value':'Inf.Deaths'}, axis=1)
imr3['Year'] = pd.to_numeric(imr3['Year'])
fig = px.bar(imr3, x="Year", y="Inf.Deaths",
color="NUTS3", hover_name="NUTS3", log_y=False)
fig.update_layout(title="Number of infant deaths")
fig.show()
# Infant mortality rate
imr4 = pd.merge(imr3, births, on=['NUTS3','Year'])
imr4['IMR'] = imr4['Inf.Deaths']/imr4['Births']*1000
fig = px.line(imr4, x="Year", y="IMR", color="NUTS3", hover_name="NUTS3", symbol="NUTS3")
fig.update_layout(title="Infant mortality rate")
fig.show()
# Regional GDP 1995-2020
#Post Request
url = "https://andmed.stat.ee/api/v1/en/stat/RAA0051"
payload = {
"query": [
{
"code": "Maakond",
"selection": {
"filter": "item",
"values": ["37","39","44","49","51","57","59","65","67","70","74","78","82","84","86"]
}
},
{
"code": "Komponent/Majandussektor",
"selection": {
"filter": "item",
"values": [
"1",
"2",
"3"
]
}
},
{
"code": "Näitaja",
"selection": {
"filter": "item",
"values": [
"1"
]
}
}
],
"response": {
"format": "json-stat2"
}
}
data = requests.post(url, data=json.dumps(payload))
results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
# print(results)
df = pd.DataFrame(results[0], columns=['Year','County','Component/ Economic sector','value'])
df = df.rename({'Component/ Economic sector':'Sector'}, axis=1)
df.tail()
# px.defaults.template = "ggplot2"
# px.defaults.color_continuous_scale = px.colors.sequential.Blackbody
px.defaults.width = 1000
px.defaults.height = 700
df.Sector.value_counts()
df['NUTS3'] = df['County'].str.split().str.get(0)
df['NUTS3'] = df['NUTS3'].replace(nuts_dict2)
df1 = df.groupby(['Year', 'NUTS3','Sector'])['value'].agg('sum').reset_index()
fig = px.line(df1, x="Year", y="value", color="NUTS3", hover_name="NUTS3", facet_col="Sector", log_y=True)
fig.show()
# Regional GDP 1995-2020
#Post Request
url = "https://andmed.stat.ee/api/v1/en/stat/RAA0053"
payload = {
"query": [
{
"code": "Piirkond",
"selection": {
"filter": "item",
"values": [
"2",
"3",
"4",
"5",
"6"
]
}
},
{
"code": "Näitaja",
"selection": {
"filter": "item",
"values": [
"3"
]
}
}
],
"response": {
"format": "json-stat2"
}
}
data = requests.post(url, data=json.dumps(payload))
results = pyjstat.from_json_stat(data.json(object_pairs_hook=OrderedDict))
# print(results)
df = pd.DataFrame(results[0], columns=['Year','Region','Indicator','value'])
df.tail()
fig = px.bar(df, x="Year", y="value", color="Region")
fig.show()
ISIC -- International Standard Industrial Classification of All Economic Activities
NRTAK -- Soviet analogue of ISIC
emp79 = pd.read_csv("/Users/martin/Documents/Loendused/EKDK/tooala_79.csv")
emp89 = pd.read_csv("/Users/martin/Documents/Loendused/EKDK/tooala_89.csv")
emp = pd.concat([emp79, emp89])
emp = emp[emp.NRTAK!='Nonactive']
print(emp['NRTAK'].unique())
['Industry' 'Agriculture' 'Transport' 'Communication' 'Construction' 'Trade' 'OMP' 'Services' 'Healthcare' 'Education' 'PubAdmin' 'Unknown']
emp2 = emp.melt(id_vars=['NRTAK','Year'], var_name="County")
emp2 = emp2[emp2['County']!='Total']
emp2['NUTS3'] = emp2['County'].replace(nuts_dict2)
emp2 = emp2.groupby(['NUTS3','NRTAK','Year'])['value'].agg('sum').reset_index()
fig = px.bar(emp2.sort_values('value'), x="Year", y="value", color="NRTAK", facet_col='NUTS3',
color_discrete_sequence=px.colors.qualitative.Alphabet)
fig.update_layout(
title = 'Number of employees by NRTAK & NUTS3', yaxis_title="Count")
fig.update_xaxes(type='category')
fig.show()
ISIC_to_NRTAK_dict = dict({'Põllumajandus - Agriculture': 'Agriculture',
'Tööstus - Industry': 'Industry',
'Ehitus - Construction': 'Construction',
'Transport - Transportation': 'Transport',
'Haridus - Education': 'Education',
'Kaubandus - Trade': 'Trade',
'Tervishoid - Health care': 'Healthcare',
'Muud teenused - Other services': 'Services',
'Avalik teenistus - Public service': 'PubAdmin',
'Kinnisvara ja äri - Real estate and business': 'Services',
'Kaevandus - Mining': 'Industry',
'Eramajapidamised - Private household': 'OMP',
'Hotellid-restoranid - Hotels-restaurants': 'Services',
'Elektri-veevarustus - Electricity and water supply': 'Services',
'Kalandus - Fishing': 'Agriculture',
'Rahandus - Finance': 'Services'})
pd.set_option("display.max_rows", None)
dat = pd.read_stata('/Users/martin/Documents/Loendused/Tulu-uuringud/tulu.dta')
dat = dat.loc[dat.aasta > 1958]
#dict_raj = dict({1:'Harju', 2:'Hiiu', 3: 'Ida-Viru', 4:'Jõgeva', 5:'Järva', 6:'Lääne', 7:'Lääne-Viru', 8:'Põlva',
# 9:'Pärnu', 10:'Rapla', 11:'Saare', 12:'Tartu', 13:'Valga', 14:'Viljandi', 15:'Võru'})
dat['T89'] = dat['a0ter89']
dat['T89'] = dat['T89'].astype('Int64').apply('{:0>4}'.format)
dat['T89'] = pd.to_numeric(dat['T89'].astype(str).str[:2]).astype('Int64').replace(dict_raj)
dat.loc[dat['T89']==0, 'T89'] = dat['a0ter59'].str.split().str.get(0)
dat['NUTS3'] = dat['T89'].replace(nuts_dict2)
dat['NUTS3'] = dat['NUTS3'].replace(rayon_dict)
dat = dat.drop(dat[(dat['NUTS3'].isin([90,99,'Nuia']))].index)
dat.aasta.value_counts()
1975 6562 1981 5672 Name: aasta, dtype: int64
dat = dat.drop(dat[(dat['sektor'].isin(['NA']))].index)
# pd.crosstab([dat.aasta,dat.NUTS3], dat.sektor)
dat.sektor.value_counts()
Tertsiaar - Tertiary 4566 Sekundaar - Secondary 4179 Primaar - Primary 3484 NA 0 Name: sektor, dtype: int64
dat['NRTAK'] = dat['ISIC'].replace(ISIC_to_NRTAK_dict)
# pd.crosstab(dat['NRTAK'],dat['sektor'])
dat0 = dat.groupby(['NRTAK','aasta','NUTS3'])['ttasu'].agg(['mean']).reset_index().rename({'aasta':'Year'}, axis=1)
# print(dat0.pivot_table(index=['NRTAK'], columns=['Year','NUTS3'],values='mean').round(0))
dat0 = dat[dat.aasta==1981].groupby(['NRTAK','NUTS3'])['ttasu'].agg(['mean']).reset_index()
dat0.pivot_table(index=['NRTAK'], columns=['NUTS3'],values='mean').round(0)
| NUTS3 | Central | N_East | North | South | West |
|---|---|---|---|---|---|
| NRTAK | |||||
| Agriculture | 245.0 | 209.0 | 215.0 | 218.0 | 229.0 |
| Construction | 240.0 | 214.0 | 186.0 | 224.0 | 226.0 |
| Education | 155.0 | 159.0 | 170.0 | 152.0 | 143.0 |
| Healthcare | 122.0 | 128.0 | 160.0 | 135.0 | 128.0 |
| Industry | 190.0 | 200.0 | 203.0 | 199.0 | 193.0 |
| PubAdmin | 123.0 | 218.0 | 184.0 | 138.0 | 160.0 |
| Services | 146.0 | 133.0 | 169.0 | 149.0 | 148.0 |
| Trade | 179.0 | 127.0 | 144.0 | 163.0 | 152.0 |
| Transport | 225.0 | 204.0 | 178.0 | 224.0 | 196.0 |
emp2['Year'] = emp2['Year'].astype('category')
dt['total'] = dt['value']*dt['mean']*12/1000_000
dt.head()
dt0 = dt.groupby(['Year','NUTS3'])['total'].agg(['sum']).reset_index()
dt0.pivot_table(index=['NUTS3'], columns=['Year'],values='sum',aggfunc= 'sum',margins=True).round(0).iloc[:,:-1]
| Year | 1979 | 1989 |
|---|---|---|
| NUTS3 | ||
| Central | 198.0 | 213.0 |
| N_East | 270.0 | 278.0 |
| North | 666.0 | 744.0 |
| South | 437.0 | 480.0 |
| West | 211.0 | 231.0 |
| All | 1782.0 | 1947.0 |
dat1 = dat.groupby(['NRTAK','NUTS3','aasta'])['ttasu'].agg(['mean']).reset_index()
dat1 = dat1.dropna()
dat1
fig = px.bar(dat1.sort_values('mean'), x="NUTS3", y="mean", color="NRTAK", facet_col='aasta', barmode='group')
# ,color_discrete_sequence=px.colors.qualitative.Rainbow)
fig.show()
import statsmodels.formula.api as sm
# dat['logttasu'] = np.log(dat.ttasu)
model = sm.ols('ttasu ~ NRTAK + NUTS3 + C(aasta)', data=dat) # + sugu + haridus
res = model.fit()
print(res.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ttasu R-squared: 0.137
Model: OLS Adj. R-squared: 0.136
Method: Least Squares F-statistic: 139.0
Date: Tue, 14 Jun 2022 Prob (F-statistic): 0.00
Time: 08:39:06 Log-Likelihood: -71219.
No. Observations: 12229 AIC: 1.425e+05
Df Residuals: 12214 BIC: 1.426e+05
Df Model: 14
Covariance Type: nonrobust
=========================================================================================
coef std err t P>|t| [0.025 0.975]
-----------------------------------------------------------------------------------------
Intercept 191.1168 2.391 79.917 0.000 186.429 195.804
NRTAK[T.Construction] 15.6540 2.929 5.345 0.000 9.913 21.395
NRTAK[T.Education] -52.4408 3.274 -16.019 0.000 -58.858 -46.024
NRTAK[T.Healthcare] -72.9940 3.858 -18.920 0.000 -80.556 -65.432
NRTAK[T.Industry] -20.1491 2.174 -9.267 0.000 -24.411 -15.887
NRTAK[T.OMP] -64.9256 6.798 -9.551 0.000 -78.250 -51.601
NRTAK[T.PubAdmin] -41.4516 4.856 -8.536 0.000 -50.970 -31.933
NRTAK[T.Services] -60.5556 2.909 -20.820 0.000 -66.257 -54.854
NRTAK[T.Trade] -61.0049 3.423 -17.824 0.000 -67.714 -54.296
NRTAK[T.Transport] -14.4734 2.977 -4.862 0.000 -20.309 -8.638
NUTS3[T.N_East] -16.7912 2.918 -5.754 0.000 -22.511 -11.071
NUTS3[T.North] -8.9463 2.535 -3.530 0.000 -13.915 -3.978
NUTS3[T.South] -11.0883 2.466 -4.496 0.000 -15.923 -6.254
NUTS3[T.West] -9.7210 2.715 -3.580 0.000 -15.044 -4.398
C(aasta)[T.1981] 36.4765 1.502 24.278 0.000 33.531 39.422
==============================================================================
Omnibus: 3768.709 Durbin-Watson: 1.958
Prob(Omnibus): 0.000 Jarque-Bera (JB): 15196.571
Skew: 1.483 Prob(JB): 0.00
Kurtosis: 7.586 Cond. No. 11.9
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.